// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

//
//  This file was previously known as instrs.h
//
/*****************************************************************************
 *  x86 instructions for  the JIT compiler
 *
 *          id      -- the enum name for the instruction
 *          nm      -- textual name (for assembly dipslay)
 *          um      -- update mode, see IUM_xx enum (rd, wr, or rw)
 *          mr      -- base encoding for R/M[reg] addressing mode
 *          mi      -- base encoding for R/M,icon addressing mode
 *          rm      -- base encoding for reg,R/M  addressing mode
 *          a4      -- base encoding for eax,i32  addressing mode
 *          rr      -- base encoding for register addressing mode
 *          tt      -- the tupletype for the instruction
 *          flags   -- flags, see INS_FLAGS_* enum
 *
******************************************************************************/

// clang-format off
#if !defined(TARGET_XARCH)
  #error Unexpected target type
#endif

#ifndef INST1
#error  At least INST1 must be defined before including this file.
#endif
/*****************************************************************************/
#ifndef INST0
#define INST0(id, nm, um, mr,                 tt, flags)
#endif
#ifndef INST2
#define INST2(id, nm, um, mr, mi,             tt, flags)
#endif
#ifndef INST3
#define INST3(id, nm, um, mr, mi, rm,         tt, flags)
#endif
#ifndef INST4
#define INST4(id, nm, um, mr, mi, rm, a4,     tt, flags)
#endif
#ifndef INST5
#define INST5(id, nm, um, mr, mi, rm, a4, rr, tt, flags)
#endif

/*****************************************************************************/
/*               The following is x86-specific                               */
/*****************************************************************************/

//    id                nm                  um      mr            mi            rm            a4            rr           tt              flags
INST5(invalid,          "INVALID",          IUM_RD, BAD_CODE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     BAD_CODE,    INS_TT_NONE,    INS_FLAGS_None)

INST5(push,             "push",             IUM_RD, 0x0030FE,     0x000068,     BAD_CODE,     BAD_CODE,     0x000050,    INS_TT_NONE,    INS_FLAGS_None)
INST5(pop,              "pop",              IUM_WR, 0x00008E,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x000058,    INS_TT_NONE,    INS_FLAGS_None)
// Does not affect the stack tracking in the emitter
INST5(push_hide,        "push",             IUM_RD, 0x0030FE,     0x000068,     BAD_CODE,     BAD_CODE,     0x000050,    INS_TT_NONE,    INS_FLAGS_None)
INST5(pop_hide,         "pop",              IUM_WR, 0x00008E,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x000058,    INS_TT_NONE,    INS_FLAGS_None)

INST5(inc,              "inc",              IUM_RW, 0x0000FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x000040,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | INS_FLAGS_Has_Wbit)
INST5(inc_l,            "inc",              IUM_RW, 0x0000FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x00C0FE,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF)
INST5(dec,              "dec",              IUM_RW, 0x0008FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x000048,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | INS_FLAGS_Has_Wbit)
INST5(dec_l,            "dec",              IUM_RW, 0x0008FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x00C8FE,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF)

// Multi-byte opcodes without modrm are represented in mixed endian fashion.
// See comment around quarter way through this file for more information.
INST5(bswap,            "bswap",            IUM_RW, 0x0F00C8,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x00C80F,    INS_TT_NONE,    INS_FLAGS_None)

//    id                nm                  um      mr            mi            rm            a4                         tt              flags
INST4(add,              "add",              IUM_RW, 0x000000,     0x000080,     0x000002,     0x000004,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)
INST4(or,               "or",               IUM_RW, 0x000008,     0x000880,     0x00000A,     0x00000C,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)
INST4(adc,              "adc",              IUM_RW, 0x000010,     0x001080,     0x000012,     0x000014,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | Reads_CF   | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)
INST4(sbb,              "sbb",              IUM_RW, 0x000018,     0x001880,     0x00001A,     0x00001C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | Reads_CF   | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)
INST4(and,              "and",              IUM_RW, 0x000020,     0x002080,     0x000022,     0x000024,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)
INST4(sub,              "sub",              IUM_RW, 0x000028,     0x002880,     0x00002A,     0x00002C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)
// Does not affect the stack tracking in the emitter
INST4(sub_hide,         "sub",              IUM_RW, 0x000028,     0x002880,     0x00002A,     0x00002C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)

INST4(xor,              "xor",              IUM_RW, 0x000030,     0x003080,     0x000032,     0x000034,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)
INST4(cmp,              "cmp",              IUM_RD, 0x000038,     0x003880,     0x00003A,     0x00003C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit)
INST4(test,             "test",             IUM_RD, 0x000084,     0x0000F6,     0x000084,     0x0000A8,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                                       | INS_FLAGS_Has_Wbit)
INST4(mov,              "mov",              IUM_WR, 0x000088,     0x0000C6,     0x00008A,     0x0000B0,                  INS_TT_NONE,    INS_FLAGS_Has_Wbit)

INST4(lea,              "lea",              IUM_WR, BAD_CODE,     BAD_CODE,     0x00008D,     BAD_CODE,                  INS_TT_NONE,    INS_FLAGS_None)

//    id                nm                  um      mr            mi            rm                                       tt              flags

// Note that emitter has only partial support for BT. It can only emit the reg,reg form
// and the registers need to be reversed to get the correct encoding.
INST3(bt,               "bt",               IUM_RD, 0x0F00A3,     BAD_CODE,     0x0F00A3,                                INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF)

INST3(bsf,              "bsf",              IUM_WR, BAD_CODE,     BAD_CODE,     0x0F00BC,                                INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Writes_ZF     | Undefined_AF  | Undefined_PF  | Undefined_CF)
INST3(bsr,              "bsr",              IUM_WR, BAD_CODE,     BAD_CODE,     0x0F00BD,                                INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Writes_ZF     | Undefined_AF  | Undefined_PF  | Undefined_CF)

INST3(movsx,            "movsx",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F00BE,                                INS_TT_NONE,    INS_FLAGS_Has_Wbit)
#ifdef TARGET_AMD64
INST3(movsxd,           "movsxd",           IUM_WR, BAD_CODE,     BAD_CODE,     0x4800000063,                            INS_TT_NONE,    INS_FLAGS_Has_Wbit)
#endif
INST3(movzx,            "movzx",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F00B6,                                INS_TT_NONE,    INS_FLAGS_Has_Wbit)

INST3(cmovo,            "cmovo",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0040,                                INS_TT_NONE,    Reads_OF)
INST3(cmovno,           "cmovno",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0041,                                INS_TT_NONE,    Reads_OF)
INST3(cmovb,            "cmovb",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0042,                                INS_TT_NONE,    Reads_CF)
INST3(cmovae,           "cmovae",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0043,                                INS_TT_NONE,    Reads_CF)
INST3(cmove,            "cmove",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0044,                                INS_TT_NONE,    Reads_ZF)
INST3(cmovne,           "cmovne",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0045,                                INS_TT_NONE,    Reads_ZF)
INST3(cmovbe,           "cmovbe",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0046,                                INS_TT_NONE,    Reads_ZF | Reads_CF)
INST3(cmova,            "cmova",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0047,                                INS_TT_NONE,    Reads_ZF | Reads_CF)
INST3(cmovs,            "cmovs",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0048,                                INS_TT_NONE,    Reads_SF)
INST3(cmovns,           "cmovns",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0049,                                INS_TT_NONE,    Reads_SF)
INST3(cmovp,            "cmovp",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004A,                                INS_TT_NONE,    Reads_PF)
INST3(cmovnp,           "cmovnp",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004B,                                INS_TT_NONE,    Reads_PF)
INST3(cmovl,            "cmovl",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004C,                                INS_TT_NONE,    Reads_OF       | Reads_SF)
INST3(cmovge,           "cmovge",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004D,                                INS_TT_NONE,    Reads_OF       | Reads_SF)
INST3(cmovle,           "cmovle",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004E,                                INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF)
INST3(cmovg,            "cmovg",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004F,                                INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF)

INST3(xchg,             "xchg",             IUM_RW, 0x000086,     BAD_CODE,     0x000086,                                INS_TT_NONE,    INS_FLAGS_Has_Wbit)
INST3(imul,             "imul",             IUM_RW, 0x0F00AC,     BAD_CODE,     0x0F00AF,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)

//    id                nm                  um      mr            mi            rm                                       tt              flags

// Instead of encoding these as 3-operand instructions, we encode them
// as 2-operand instructions with the target register being implicit
// implicit_reg = op1*op2_icon
#define INSTMUL INST3
INSTMUL(imul_AX,        "imul",             IUM_RD, BAD_CODE,     0x000068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_CX,        "imul",             IUM_RD, BAD_CODE,     0x000868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_DX,        "imul",             IUM_RD, BAD_CODE,     0x001068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_BX,        "imul",             IUM_RD, BAD_CODE,     0x001868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_SP,        "imul",             IUM_RD, BAD_CODE,     BAD_CODE,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_BP,        "imul",             IUM_RD, BAD_CODE,     0x002868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_SI,        "imul",             IUM_RD, BAD_CODE,     0x003068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_DI,        "imul",             IUM_RD, BAD_CODE,     0x003868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)

#ifdef TARGET_AMD64

INSTMUL(imul_08,        "imul",             IUM_RD, BAD_CODE,     0x4400000068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_09,        "imul",             IUM_RD, BAD_CODE,     0x4400000868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_10,        "imul",             IUM_RD, BAD_CODE,     0x4400001068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_11,        "imul",             IUM_RD, BAD_CODE,     0x4400001868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_12,        "imul",             IUM_RD, BAD_CODE,     0x4400002068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_13,        "imul",             IUM_RD, BAD_CODE,     0x4400002868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_14,        "imul",             IUM_RD, BAD_CODE,     0x4400003068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
INSTMUL(imul_15,        "imul",             IUM_RD, BAD_CODE,     0x4400003868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)

#endif // TARGET_AMD64

// the hex codes in this file represent the instruction encoding as follows:
// 0x0000ff00 - modrm byte position
// 0x000000ff - last byte of opcode (before modrm)
// 0x00ff0000 - first byte of opcode
// 0xff000000 - middle byte of opcode, if needed (after first, before last)
//
// So a 1-byte opcode is:      and with modrm:
//             0x00000011          0x0000RM11
//
// So a 2-byte opcode is:      and with modrm:
//             0x00002211          0x0011RM22
//
// So a 3-byte opcode is:      and with modrm:
//             0x00113322          0x2211RM33
//
// So a 4-byte opcode would be something like this:
//             0x22114433

#define PACK3(byte1,byte2,byte3) (((byte1) << 16) | ((byte2) << 24) | (byte3))
#define PACK2(byte1,byte2)                       (((byte1) << 16) | (byte2))
#define SSEFLT(c) PACK3(0xf3, 0x0f, c)
#define SSEDBL(c) PACK3(0xf2, 0x0f, c)
#define PCKDBL(c) PACK3(0x66, 0x0f, c)
#define PCKFLT(c) PACK2(0x0f, c)
#define PCKMVB(c) PACK3(0x0F, 0x38, c)

// These macros encode extra byte that is implicit in the macro.
#define PACK4(byte1,byte2,byte3,byte4) (((byte1) << 16) | ((byte2) << 24) | (byte3) | ((byte4) << 8))

#define PSSE38(p, c)     PACK4(p, 0x0f, 0x38, c)
#define PSSE3A(p, c)     PACK4(p, 0x0f, 0x3A, c)

#define SSE38(c)         PSSE38(0x66, c)
#define SSE3A(c)         PSSE3A(0x66, c)

// VEX* encodes the implied leading opcode bytes in c1:
// 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a
#define VEX2INT(c1,c2)   PACK3(c1, 0xc5, c2)
#define VEX3INT(c1,c2)   PACK4(c1, 0xc5, 0x02, c2)
#define VEX3FLT(c1,c2)   PACK4(c1, 0xc5, 0x02, c2)

INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

// SSE
INST3(addps,            "addps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x58),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Add packed singles
INST3(addss,            "addss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x58),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add scalar singles
INST3(andnps,           "andnps",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x55),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // And-Not packed singles
INST3(andps,            "andps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x54),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // AND packed singles
INST3(cmpps,            "cmpps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0xC2),                            INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // compare packed singles
INST3(cmpss,            "cmpss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0xC2),                            INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // compare scalar singles
INST3(comiss,           "comiss",           IUM_RD, BAD_CODE,     BAD_CODE,     PCKFLT(0x2F),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX                                        | Resets_OF    | Resets_SF    | Writes_ZF    | Resets_AF    | Writes_PF    | Writes_CF)    // ordered compare singles
INST3(cvtsi2ss32,       "cvtsi2ss",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2A),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt DWORD to scalar single
INST3(cvtsi2ss64,       "cvtsi2ss",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2A),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt QWORD to scalar single
INST3(cvtss2si,         "cvtss2si",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2D),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt scalar single to DWORD/QWORD
INST3(cvttss2si,        "cvttss2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar single to DWORD
INST3(divps,            "divps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5E),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Divide packed singles
INST3(divss,            "divss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5E),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Divide scalar singles
INST3(maxps,            "maxps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5F),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Return Maximum packed singles
INST3(maxss,            "maxss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5F),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Return Maximum scalar single
INST3(minps,            "minps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5D),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Return Minimum packed singles
INST3(minss,            "minss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5D),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Return Minimum scalar single
INST3(movaps,           "movaps",           IUM_WR, PCKFLT(0x29), BAD_CODE,     PCKFLT(0x28),                            INS_TT_FULL_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(movhlps,          "movhlps",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x12),                            INS_TT_NONE,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(movhps,           "movhps",           IUM_WR, PCKFLT(0x17), BAD_CODE,     PCKFLT(0x16),                            INS_TT_TUPLE2,                       Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movlhps,          "movlhps",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x16),                            INS_TT_NONE,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(movlps,           "movlps",           IUM_WR, PCKFLT(0x13), BAD_CODE,     PCKFLT(0x12),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movmskps,         "movmskps",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x50),                            INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)
INST3(movntps,          "movntps",          IUM_WR, PCKFLT(0x2B), BAD_CODE,     BAD_CODE,                                INS_TT_FULL_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(movss,            "movss",            IUM_WR, SSEFLT(0x11), BAD_CODE,     SSEFLT(0x10),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movups,           "movups",           IUM_WR, PCKFLT(0x11), BAD_CODE,     PCKFLT(0x10),                            INS_TT_FULL_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(mulps,            "mulps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x59),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Multiply packed singles
INST3(mulss,            "mulss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x59),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply scalar single
INST3(orps,             "orps",             IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x56),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Or packed singles
INST3(prefetchnta,      "prefetchnta",      IUM_RD, 0x000F0018,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
INST3(prefetcht0,       "prefetcht0",       IUM_RD, 0x000F0818,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
INST3(prefetcht1,       "prefetcht1",       IUM_RD, 0x000F1018,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
INST3(prefetcht2,       "prefetcht2",       IUM_RD, 0x000F1818,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
INST3(rcpps,            "rcpps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x53),                            INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)                                                                                                                                                   // Reciprocal of packed singles
INST3(rcpss,            "rcpss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x53),                            INS_TT_NONE,                                          REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Reciprocal of scalar single
INST3(rsqrtps,          "rsqrtps",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x52),                            INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)                                                                                                                                                   // Reciprocal Sqrt of packed singles
INST3(rsqrtss,          "rsqrtss",          IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x52),                            INS_TT_NONE,                                          REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Reciprocal Sqrt of scalar single
INST3(shufps,           "shufps",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0xC6),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(sfence,           "sfence",           IUM_RD, 0x000FF8AE,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
INST3(sqrtps,           "sqrtps",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x51),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Sqrt of packed singles
INST3(sqrtss,           "sqrtss",           IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x51),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Sqrt of scalar single
INST3(subps,            "subps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5C),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Subtract packed singles
INST3(subss,            "subss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5C),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Subtract scalar singles
INST3(ucomiss,          "ucomiss",          IUM_RD, BAD_CODE,     BAD_CODE,     PCKFLT(0x2E),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX                                        | Resets_OF    | Resets_SF    | Writes_ZF    | Resets_AF    | Writes_PF    | Writes_CF)    // unordered compare singles
INST3(unpckhps,         "unpckhps",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x15),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(unpcklps,         "unpcklps",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x14),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(xorps,            "xorps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x57),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // XOR packed singles

// SSE2
INST3(addpd,            "addpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x58),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Add packed doubles
INST3(addsd,            "addsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x58),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add scalar doubles
INST3(andnpd,           "andnpd",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x55),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // And-Not packed doubles
INST3(andpd,            "andpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x54),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // AND packed doubles
INST3(cmppd,            "cmppd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xC2),                            INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // compare packed doubles
INST3(cmpsd,            "cmpsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0xC2),                            INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // compare scalar doubles
INST3(comisd,           "comisd",           IUM_RD, BAD_CODE,     BAD_CODE,     PCKDBL(0x2F),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX                                        | Resets_OF    | Resets_SF    | Writes_ZF    | Resets_AF    | Writes_PF    | Writes_CF)    // ordered compare doubles
INST3(cvtdq2pd,         "cvtdq2pd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0xE6),                            INS_TT_HALF,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt packed DWORDs to doubles
INST3(cvtdq2ps,         "cvtdq2ps",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5B),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt packed DWORDs to singles
INST3(cvtpd2dq,         "cvtpd2dq",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0xE6),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt packed doubles to DWORDs
INST3(cvtpd2ps,         "cvtpd2ps",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x5A),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt packed doubles to singles
INST3(cvtps2dq,         "cvtps2dq",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x5B),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt packed singles to DWORDs
INST3(cvtps2pd,         "cvtps2pd",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5A),                            INS_TT_HALF,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt packed singles to doubles
INST3(cvtsd2si,         "cvtsd2si",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2D),                            INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt scalar double to DWORD
INST3(cvtsd2ss,         "cvtsd2ss",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x5A),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar double to scalar singles
INST3(cvtsi2sd32,       "cvtsi2sd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2A),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt DWORD to scalar double
INST3(cvtsi2sd64,       "cvtsi2sd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2A),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt QWORD to scalar double
INST3(cvtss2sd,         "cvtss2sd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5A),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar single to scalar doubles
INST3(cvttpd2dq,        "cvttpd2dq",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE6),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc packed doubles to DWORDs
INST3(cvttps2dq,        "cvttps2dq",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5B),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc packed singles to DWORDs
INST3(cvttsd2si,        "cvttsd2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar double to signed DWORDs
INST3(divpd,            "divpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x5E),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Divide packed doubles
INST3(divsd,            "divsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x5E),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Divide scalar doubles
INST3(lfence,           "lfence",           IUM_RD, 0x000FE8AE,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
INST3(maskmovdqu,       "maskmovdqu",       IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xF7),                            INS_TT_NONE,                                          REX_WIG)
INST3(maxpd,            "maxpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x5F),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Return Maximum packed doubles
INST3(maxsd,            "maxsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x5F),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Return Maximum scalar double
INST3(mfence,           "mfence",           IUM_RD, 0x000FF0AE,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
INST3(minpd,            "minpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x5D),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Return Minimum packed doubles
INST3(minsd,            "minsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x5D),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Return Minimum scalar double
INST3(movapd,           "movapd",           IUM_WR, PCKDBL(0x29), BAD_CODE,     PCKDBL(0x28),                            INS_TT_FULL_MEM,                     Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(movd,             "movd",             IUM_WR, PCKDBL(0x7E), BAD_CODE,     PCKDBL(0x6E),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Move DWORD/QWORD between xmm regs <-> memory/r32/r64 regs
INST3(movdqa,           "movdqa",           IUM_WR, PCKDBL(0x7F), BAD_CODE,     PCKDBL(0x6F),                            INS_TT_FULL_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(movdqu,           "movdqu",           IUM_WR, SSEFLT(0x7F), BAD_CODE,     SSEFLT(0x6F),                            INS_TT_FULL_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(movhpd,           "movhpd",           IUM_WR, PCKDBL(0x17), BAD_CODE,     PCKDBL(0x16),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movlpd,           "movlpd",           IUM_WR, PCKDBL(0x13), BAD_CODE,     PCKDBL(0x12),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movmskpd,         "movmskpd",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x50),                            INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)                                                                                                                                                   // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
INST3(movntdq,          "movntdq",          IUM_WR, PCKDBL(0xE7), BAD_CODE,     BAD_CODE,                                INS_TT_FULL_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(movnti,           "movnti",           IUM_WR, PCKFLT(0xC3), BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WX)
INST3(movntpd,          "movntpd",          IUM_WR, PCKDBL(0x2B), BAD_CODE,     BAD_CODE,                                INS_TT_FULL_MEM,                     Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(movq,             "movq",             IUM_WR, PCKDBL(0xD6), BAD_CODE,     SSEFLT(0x7E),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Move Quadword between memory/mm <-> regs
INST3(movsd_simd,       "movsd",            IUM_WR, SSEDBL(0x11), BAD_CODE,     SSEDBL(0x10),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movupd,           "movupd",           IUM_WR, PCKDBL(0x11), BAD_CODE,     PCKDBL(0x10),                            INS_TT_FULL_MEM,                     Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)
INST3(mulpd,            "mulpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x59),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Multiply packed doubles
INST3(mulsd,            "mulsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x59),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply scalar doubles
INST3(orpd,             "orpd",             IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x56),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Or packed doubles
INST3(packssdw,         "packssdw",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x6B),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Pack (narrow) int to short with saturation
INST3(packsswb,         "packsswb",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x63),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Pack (narrow) short to byte with saturation
INST3(packuswb,         "packuswb",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x67),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Pack (narrow) short to unsigned byte with saturation
INST3(paddb,            "paddb",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xFC),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add packed byte integers
INST3(paddd,            "paddd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xFE),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Add packed double-word (32-bit) integers
INST3(paddq,            "paddq",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xD4),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Add packed quad-word (64-bit) integers
INST3(paddsb,           "paddsb",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xEC),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add packed signed byte integers and saturate the results
INST3(paddsw,           "paddsw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xED),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add packed signed word integers and saturate the results
INST3(paddusb,          "paddusb",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xDC),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add packed unsigned byte integers and saturate the results
INST3(paddusw,          "paddusw",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xDD),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add packed unsigned word integers and saturate the results
INST3(paddw,            "paddw",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xFD),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add packed word (16-bit) integers
INST3(pand,             "pand",             IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xDB),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed bit-wise AND of two xmm regs
INST3(pandn,            "pandn",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xDF),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed bit-wise AND NOT of two xmm regs
INST3(pavgb,            "pavgb",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE0),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Average of packed byte integers
INST3(pavgw,            "pavgw",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE3),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Average of packed word integers
INST3(pcmpeqb,          "pcmpeqb",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x74),                            INS_TT_NONE,                         Input_8Bit     | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 8-bit integers for equality
INST3(pcmpeqd,          "pcmpeqd",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x76),                            INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 32-bit integers for equality
INST3(pcmpeqw,          "pcmpeqw",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x75),                            INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 16-bit integers for equality
INST3(pcmpgtb,          "pcmpgtb",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x64),                            INS_TT_NONE,                         Input_8Bit     | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 8-bit signed integers for greater than
INST3(pcmpgtd,          "pcmpgtd",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x66),                            INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 32-bit signed integers for greater than
INST3(pcmpgtw,          "pcmpgtw",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x65),                            INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 16-bit signed integers for greater than
INST3(pextrw,           "pextrw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xC5),                            INS_TT_TUPLE1_SCALAR,                Input_16Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Extract 16-bit value into a r32 with zero extended to 32-bits
INST3(pinsrw,           "pinsrw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xC4),                            INS_TT_TUPLE1_SCALAR,                Input_16Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert word at index
INST3(pmaddwd,          "pmaddwd",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xF5),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst
INST3(pmaxsw,           "pmaxsw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xEE),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // packed maximum signed words
INST3(pmaxub,           "pmaxub",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xDE),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // packed maximum unsigned bytes
INST3(pminsw,           "pminsw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xEA),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // packed minimum signed words
INST3(pminub,           "pminub",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xDA),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // packed minimum unsigned bytes
INST3(pmovmskb,         "pmovmskb",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xD7),                            INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)                                                                                                                                                   // Move the MSB bits of all bytes in a xmm reg to an int reg
INST3(pmulhuw,          "pmulhuw",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE4),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply high the packed 16-bit unsigned integers
INST3(pmulhw,           "pmulhw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE5),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply high the packed 16-bit signed integers
INST3(pmullw,           "pmullw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xD5),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
INST3(pmuludq,          "pmuludq",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xF4),                            INS_TT_FULL_MEM,                     Input_32Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed multiply 32-bit unsigned integers and store 64-bit result
INST3(por,              "por",              IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xEB),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed bit-wise OR of two xmm regs
INST3(psadbw,           "psadbw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xF6),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Compute the sum of absolute differences of packed unsigned 8-bit integers
INST3(pshufd,           "pshufd",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x70),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed shuffle of 32-bit integers
INST3(pshufhw,          "pshufhw",          IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x70),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
INST3(pshuflw,          "pshuflw",          IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x70),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
INST3(pslld,            "pslld",            IUM_WR, BAD_CODE,     PCKDBL(0x72), PCKDBL(0xF2),                            INS_TT_FULL     | INS_TT_MEM128,     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift left logical of 32-bit integers
INST3(pslldq,           "pslldq",           IUM_WR, BAD_CODE,     PCKDBL(0x73), BAD_CODE,                                INS_TT_FULL_MEM,                                      REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Shift left logical of xmm reg by given number of bytes
INST3(psllq,            "psllq",            IUM_WR, BAD_CODE,     PCKDBL(0x73), PCKDBL(0xF3),                            INS_TT_FULL     | INS_TT_MEM128,     Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift left logical of 64-bit integers
INST3(psllw,            "psllw",            IUM_WR, BAD_CODE,     PCKDBL(0x71), PCKDBL(0xF1),                            INS_TT_FULL_MEM | INS_TT_MEM128,     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift left logical of 16-bit integers
INST3(psrad,            "psrad",            IUM_WR, BAD_CODE,     PCKDBL(0x72), PCKDBL(0xE2),                            INS_TT_FULL     | INS_TT_MEM128,     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift right arithmetic of 32-bit integers
INST3(psraw,            "psraw",            IUM_WR, BAD_CODE,     PCKDBL(0x71), PCKDBL(0xE1),                            INS_TT_FULL_MEM | INS_TT_MEM128,     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift right arithmetic of 16-bit integers
INST3(psrld,            "psrld",            IUM_WR, BAD_CODE,     PCKDBL(0x72), PCKDBL(0xD2),                            INS_TT_FULL     | INS_TT_MEM128,     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift right logical of 32-bit integers
INST3(psrldq,           "psrldq",           IUM_WR, BAD_CODE,     PCKDBL(0x73), BAD_CODE,                                INS_TT_FULL_MEM,                                      REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Shift right logical of xmm reg by given number of bytes
INST3(psrlq,            "psrlq",            IUM_WR, BAD_CODE,     PCKDBL(0x73), PCKDBL(0xD3),                            INS_TT_FULL     | INS_TT_MEM128,     Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift right logical of 64-bit integers
INST3(psrlw,            "psrlw",            IUM_WR, BAD_CODE,     PCKDBL(0x71), PCKDBL(0xD1),                            INS_TT_FULL_MEM | INS_TT_MEM128,     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift right logical of 16-bit integers
INST3(psubb,            "psubb",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xF8),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Subtract packed word (16-bit) integers
INST3(psubd,            "psubd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xFA),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Subtract packed double-word (32-bit) integers
INST3(psubq,            "psubq",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xFB),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // subtract packed quad-word (64-bit) integers
INST3(psubw,            "psubw",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xF9),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Subtract packed word (16-bit) integers
INST3(psubsb,           "psubsb",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE8),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation
INST3(psubsw,           "psubsw",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE9),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation
INST3(psubusb,          "psubusb",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xD8),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation
INST3(psubusw,          "psubusw",          IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xD9),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation
INST3(punpckhbw,        "punpckhbw",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x68),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed logical (unsigned) widen ubyte to ushort (hi)
INST3(punpckhdq,        "punpckhdq",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x6A),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(punpckhqdq,       "punpckhqdq",       IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x6D),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed logical (unsigned) widen uint to ulong (hi)
INST3(punpckhwd,        "punpckhwd",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x69),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed logical (unsigned) widen ushort to uint (hi)
INST3(punpcklbw,        "punpcklbw",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x60),                            INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed logical (unsigned) widen ubyte to ushort (lo)
INST3(punpckldq,        "punpckldq",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x62),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(punpcklqdq,       "punpcklqdq",       IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x6C),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed logical (unsigned) widen uint to ulong (lo)
INST3(punpcklwd,        "punpcklwd",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x61),                            INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed logical (unsigned) widen ushort to uint (lo)
INST3(pxor,             "pxor",             IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xEF),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed bit-wise XOR of two xmm regs
INST3(shufpd,           "shufpd",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xC6),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(sqrtpd,           "sqrtpd",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x51),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Sqrt of packed doubles
INST3(sqrtsd,           "sqrtsd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x51),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Sqrt of scalar double
INST3(subpd,            "subpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x5C),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Subtract packed doubles
INST3(subsd,            "subsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x5C),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Subtract scalar doubles
INST3(ucomisd,          "ucomisd",          IUM_RD, BAD_CODE,     BAD_CODE,     PCKDBL(0x2E),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX                                        | Resets_OF    | Resets_SF    | Writes_ZF    | Resets_AF    | Writes_PF    | Writes_CF)    // unordered compare doubles
INST3(unpckhpd,         "unpckhpd",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x15),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed logical (unsigned) widen ubyte to ushort (hi)
INST3(unpcklpd,         "unpcklpd",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x14),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed logical (unsigned) widen ubyte to ushort (hi)
INST3(xorpd,            "xorpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x57),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // XOR packed doubles

// SSE3
INST3(addsubpd,         "addsubpd",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xD0),                            INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add/Subtract packed doubles
INST3(addsubps,         "addsubps",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0xD0),                            INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Add/Subtract packed singles
INST3(haddpd,           "haddpd",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x7C),                            INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Horizontal add packed doubles
INST3(haddps,           "haddps",           IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x7C),                            INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Horizontal add packed floats
INST3(hsubpd,           "hsubpd",           IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x7D),                            INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Horizontal subtract packed doubles
INST3(hsubps,           "hsubps",           IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x7D),                            INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Horizontal subtract packed floats
INST3(lddqu,            "lddqu",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0xF0),                            INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)                                                                                                                                                   // Load Unaligned integer
INST3(movddup,          "movddup",          IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x12),                            INS_TT_MOVDDUP,                      Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Replicate Double FP Values
INST3(movshdup,         "movshdup",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x16),                            INS_TT_FULL_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Replicate odd-indexed Single FP Values
INST3(movsldup,         "movsldup",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x12),                            INS_TT_FULL_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Replicate even-indexed Single FP Values

// SSSE3
INST3(pabsb,            "pabsb",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x1C),                             INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed absolute value of bytes
INST3(pabsd,            "pabsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x1E),                             INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed absolute value of 32-bit integers
INST3(pabsw,            "pabsw",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x1D),                             INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed absolute value of 16-bit integers
INST3(palignr,          "palignr",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x0F),                             INS_TT_FULL_MEM,                                      REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed Align Right
INST3(phaddd,           "phaddd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x02),                             INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed horizontal add
INST3(phaddsw,          "phaddsw",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x03),                             INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed horizontal add of 16-bit integers with saturation
INST3(phaddw,           "phaddw",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x01),                             INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed horizontal add of 16-bit integers
INST3(phsubd,           "phsubd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x06),                             INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed horizontal subtract of 32-bit integers
INST3(phsubsw,          "phsubsw",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x07),                             INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed horizontal subtract of 16-bit integers with saturation
INST3(phsubw,           "phsubw",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x05),                             INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed horizontal subtract of 16-bit integers
INST3(pmaddubsw,        "pmaddubsw",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x04),                             INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply and Add Packed Signed and Unsigned Bytes
INST3(pmulhrsw,         "pmulhrsw",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x0B),                             INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed Multiply High with Round and Scale
INST3(pshufb,           "pshufb",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x00),                             INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed Shuffle Bytes
INST3(psignb,           "psignb",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x08),                             INS_TT_NONE,                         Input_8Bit     | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed SIGN
INST3(psignd,           "psignd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x0A),                             INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed SIGN
INST3(psignw,           "psignw",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x09),                             INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed SIGN

// AESNI & PCLMULQDQ
INST3(aesdec,           "aesdec",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xDE),                             INS_TT_NONE,                                          REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Perform one round of an AES decryption flow
INST3(aesdeclast,       "aesdeclast",       IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xDF),                             INS_TT_NONE,                                          REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Perform last round of an AES decryption flow
INST3(aesenc,           "aesenc",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xDC),                             INS_TT_NONE,                                          REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Perform one round of an AES encryption flow
INST3(aesenclast,       "aesenclast",       IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xDD),                             INS_TT_NONE,                                          REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Perform last round of an AES encryption flow
INST3(aesimc,           "aesimc",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xDB),                             INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)                                                                                                                                                   // Perform the AES InvMixColumn Transformation
INST3(aeskeygenassist,  "aeskeygenassist",  IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0xDF),                             INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)                                                                                                                                                   // AES Round Key Generation Assist
INST3(pclmulqdq,        "pclmulqdq" ,       IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x44),                             INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Perform a carry-less multiplication of two quadwords

// SSE4.1
INST3(blendpd,          "blendpd",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x0D),                             INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Packed Double Precision Floating-Point Values
INST3(blendps,          "blendps",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x0C),                             INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Packed Single Precision Floating-Point Values
INST3(blendvpd,         "blendvpd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x15),                             INS_TT_NONE,                         Input_64Bit    | REX_W0)                                                                                                                                                                        // Variable Blend Packed Doubles
INST3(blendvps,         "blendvps",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x14),                             INS_TT_NONE,                         Input_32Bit    | REX_W0)                                                                                                                                                                        // Variable Blend Packed Singles
INST3(dppd,             "dppd",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x41),                             INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed dot product of two double vector regs
INST3(dpps,             "dpps",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x40),                             INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed dot product of two float vector regs
INST3(extractps,        "extractps",        IUM_WR, SSE3A(0x17),  BAD_CODE,     BAD_CODE,                                INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Extract Packed Floating-Point Values
INST3(insertps,         "insertps",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x21),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert packed single precision float value
INST3(movntdqa,         "movntdqa",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x2A),                             INS_TT_FULL_MEM,                                      REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Load Double Quadword Non-Temporal Aligned Hint
INST3(mpsadbw,          "mpsadbw",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x42),                             INS_TT_NONE,                         Input_8Bit     | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Compute Multiple Packed Sums of Absolute Difference
INST3(packusdw,         "packusdw",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x2B),                             INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Pack (narrow) int to unsigned short with saturation
INST3(pblendvb,         "pblendvb",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x10),                             INS_TT_NONE,                         Input_8Bit     | REX_W0)                                                                                                                                                                        // Variable Blend Packed Bytes
INST3(pblendw,          "pblendw",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x0E),                             INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Packed Words
INST3(pcmpeqq,          "pcmpeqq",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x29),                             INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 64-bit integers for equality
INST3(pextrb,           "pextrb",           IUM_WR, SSE3A(0x14),  BAD_CODE,     BAD_CODE,                                INS_TT_TUPLE1_SCALAR,                Input_8Bit     | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Extract Byte
INST3(pextrd,           "pextrd",           IUM_WR, SSE3A(0x16),  BAD_CODE,     BAD_CODE,                                INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Extract Dword
INST3(pextrq,           "pextrq",           IUM_WR, SSE3A(0x16),  BAD_CODE,     BAD_CODE,                                INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Extract Qword
INST3(pextrw_sse41,     "pextrw",           IUM_WR, SSE3A(0x15),  BAD_CODE,     BAD_CODE,                                INS_TT_TUPLE1_SCALAR,                Input_16Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Extract Word
INST3(phminposuw,       "phminposuw",       IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x41),                             INS_TT_NONE,                         Input_16Bit    | REX_WIG      | Encoding_VEX)                                                                                                                                                   // Packed Horizontal Word Minimum
INST3(pinsrb,           "pinsrb",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x20),                             INS_TT_TUPLE1_SCALAR,                Input_8Bit     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert Byte
INST3(pinsrd,           "pinsrd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x22),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert Dword
INST3(pinsrq,           "pinsrq",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x22),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert Qword
INST3(pmaxsb,           "pmaxsb",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x3C),                             INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // packed maximum signed bytes
INST3(pmaxsd,           "pmaxsd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x3D),                             INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed maximum 32-bit signed integers
INST3(pmaxud,           "pmaxud",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x3F),                             INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed maximum 32-bit unsigned integers
INST3(pmaxuw,           "pmaxuw",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x3E),                             INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // packed maximum 16-bit unsigned integers
INST3(pminsb,           "pminsb",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x38),                             INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // packed minimum signed bytes
INST3(pminsd,           "pminsd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x39),                             INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed minimum 32-bit signed integers
INST3(pminud,           "pminud",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x3B),                             INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed minimum 32-bit unsigned integers
INST3(pminuw,           "pminuw",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x3A),                             INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // packed minimum 16-bit unsigned integers
INST3(pmovsxbd,         "pmovsxbd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x21),                             INS_TT_QUARTER_MEM,                  Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed sign extend byte to int
INST3(pmovsxbq,         "pmovsxbq",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x22),                             INS_TT_EIGHTH_MEM,                   Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed sign extend byte to long
INST3(pmovsxbw,         "pmovsxbw",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x20),                             INS_TT_HALF_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed sign extend byte to short
INST3(pmovsxdq,         "pmovsxdq",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x25),                             INS_TT_HALF_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed sign extend int to long
INST3(pmovsxwd,         "pmovsxwd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x23),                             INS_TT_HALF_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed sign extend short to int
INST3(pmovsxwq,         "pmovsxwq",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x24),                             INS_TT_QUARTER_MEM,                  Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed sign extend short to long
INST3(pmovzxbd,         "pmovzxbd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x31),                             INS_TT_QUARTER_MEM,                  Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed zero extend byte to intg
INST3(pmovzxbq,         "pmovzxbq",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x32),                             INS_TT_EIGHTH_MEM,                   Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed zero extend byte to lon
INST3(pmovzxbw,         "pmovzxbw",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x30),                             INS_TT_HALF_MEM,                     Input_8Bit     | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed zero extend byte to short
INST3(pmovzxdq,         "pmovzxdq",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x35),                             INS_TT_HALF_MEM,                     Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed zero extend int to long
INST3(pmovzxwd,         "pmovzxwd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x33),                             INS_TT_HALF_MEM,                     Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed zero extend short to int
INST3(pmovzxwq,         "pmovzxwq",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x34),                             INS_TT_QUARTER_MEM,                  Input_16Bit    | REX_WIG      | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Packed zero extend short to long
INST3(pmuldq,           "pmuldq",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x28),                             INS_TT_FULL,                         Input_32Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed multiply 32-bit signed integers and store 64-bit result
INST3(pmulld,           "pmulld",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x40),                             INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
INST3(ptest,            "ptest",            IUM_RD, BAD_CODE,     BAD_CODE,     SSE38(0x17),                             INS_TT_NONE,                                          REX_WIG      | Encoding_VEX                                                         | Resets_OF    | Resets_SF    | Writes_ZF    | Resets_AF    | Resets_PF    | Writes_CF)    // Packed logical compare
INST3(roundpd,          "roundpd",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x09),                             INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Round packed double precision floating-point values
INST3(roundps,          "roundps",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x08),                             INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Round packed single precision floating-point values
INST3(roundsd,          "roundsd",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x0B),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Round scalar double precision floating-point values
INST3(roundss,          "roundss",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x0A),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Round scalar single precision floating-point values

// SSE4.2
INST3(pcmpgtq,          "pcmpgtq",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x37),                             INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 64-bit integers for equality

INST3(LAST_SSE_INSTRUCTION, "LAST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

// AVX
INST3(vblendvpd,        "blendvpd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x4B),                             INS_TT_NONE,                         Input_64Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Variable Blend Packed Doubles
INST3(vblendvps,        "blendvps",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x4A),                             INS_TT_NONE,                         Input_32Bit    | REX_WIG      | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Variable Blend Packed Singles
INST3(vbroadcastf128,   "broadcastf128",    IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x1A),                             INS_TT_TUPLE4,                       Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Broadcast packed float values read from memory to entire ymm register
INST3(vbroadcastsd,     "broadcastsd",      IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x19),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Broadcast float value read from memory to entire ymm register
INST3(vbroadcastss,     "broadcastss",      IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x18),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Broadcast float value read from memory to entire ymm register
INST3(vextractf128,     "extractf128",      IUM_WR, SSE3A(0x19),  BAD_CODE,     BAD_CODE,                                INS_TT_TUPLE4,                       Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Extract 128-bit packed floating point values
INST3(vinsertf128,      "insertf128",       IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x18),                             INS_TT_TUPLE4,                       Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert 128-bit packed floating point values
INST3(vmaskmovpd,       "maskmovpd",        IUM_WR, SSE38(0x2F),  BAD_CODE,     SSE38(0x2D),                             INS_TT_NONE,                         Input_64Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
INST3(vmaskmovps,       "maskmovps",        IUM_WR, SSE38(0x2E),  BAD_CODE,     SSE38(0x2C),                             INS_TT_NONE,                         Input_32Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
INST3(vpblendvb,        "pblendvb",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x4C),                             INS_TT_NONE,                         Input_8Bit     | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Variable Blend Packed Bytes
INST3(vperm2f128,       "perm2f128",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x06),                             INS_TT_NONE,                                          REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Permute Floating-Point Values
INST3(vpermilpd,        "permilpd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x05),                             INS_TT_FULL,                         Input_64Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
INST3(vpermilpdvar,     "permilpd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x0D),                             INS_TT_FULL,                         Input_64Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
INST3(vpermilps,        "permilps",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x04),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
INST3(vpermilpsvar,     "permilps",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x0C),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
INST3(vtestpd,          "testpd",           IUM_RD, BAD_CODE,     BAD_CODE,     SSE38(0x0F),                             INS_TT_NONE,                         Input_64Bit    | REX_W0       | Encoding_VEX                                                         | Resets_OF    | Resets_SF    | Writes_ZF    | Resets_AF    | Resets_PF    | Writes_CF)    // Packed Bit Test
INST3(vtestps,          "testps",           IUM_RD, BAD_CODE,     BAD_CODE,     SSE38(0x0E),                             INS_TT_NONE,                         Input_32Bit    | REX_W0       | Encoding_VEX                                                         | Resets_OF    | Resets_SF    | Writes_ZF    | Resets_AF    | Resets_PF    | Writes_CF)    // Packed Bit Test
INST3(vzeroupper,       "zeroupper",        IUM_WR, 0xC577F8,     BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG      | Encoding_VEX)                                                                                                                                                   // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)

// AVX2
INST3(vbroadcasti128,   "broadcasti128",    IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x5A),                             INS_TT_TUPLE4,                       Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Broadcast packed integer values read from memory to entire ymm register
INST3(vextracti128,     "extracti128",      IUM_WR, SSE3A(0x39),  BAD_CODE,     BAD_CODE,                                INS_TT_TUPLE4,                       Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Extract 128-bit packed integer values
INST3(vgatherdpd,       "gatherdpd",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x92),                             INS_TT_NONE,                         Input_64Bit    | REX_W1       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Gather Packed DP FP Values Using Signed Dword Indices
INST3(vgatherdps,       "gatherdps",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x92),                             INS_TT_NONE,                         Input_32Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Gather Packed SP FP values Using Signed Dword Indices
INST3(vgatherqpd,       "gatherqpd",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x93),                             INS_TT_NONE,                         Input_64Bit    | REX_W1       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Gather Packed DP FP Values Using Signed Qword Indices
INST3(vgatherqps,       "gatherqps",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x93),                             INS_TT_NONE,                         Input_32Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Gather Packed SP FP values Using Signed Qword Indices
INST3(vinserti128,      "inserti128",       IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x38),                             INS_TT_TUPLE4,                       Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert 128-bit packed integer values
INST3(vpblendd,         "pblendd",          IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x02),                             INS_TT_NONE,                         Input_32Bit    | REX_W0       | Encoding_VEX  |                  INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Packed DWORDs
INST3(vpbroadcastb,     "pbroadcastb",      IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x78),                             INS_TT_TUPLE1_SCALAR,                Input_8Bit     | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Broadcast int8 value from reg/memory to entire ymm register
INST3(vpbroadcastd,     "pbroadcastd",      IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x58),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Broadcast int32 value from reg/memory to entire ymm register
INST3(vpbroadcastq,     "pbroadcastq",      IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x59),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Broadcast int64 value from reg/memory to entire ymm register
INST3(vpbroadcastw,     "pbroadcastw",      IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x79),                             INS_TT_TUPLE1_SCALAR,                Input_16Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Broadcast int16 value from reg/memory to entire ymm register
INST3(vperm2i128,       "perm2i128",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x46),                             INS_TT_NONE,                                          REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Permute 128-bit halves of input register
INST3(vpermd,           "permd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x36),                             INS_TT_FULL,                         Input_64Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Permute Packed Doublewords Elements
INST3(vpermpd,          "permpd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x01),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Permute Double-Precision Floating-Point Values
INST3(vpermps,          "permps",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x16),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Permute Single-Precision Floating-Point Elements
INST3(vpermq,           "permq",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x00),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // Permute 64-bit of input register
INST3(vpgatherdd,       "pgatherdd",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x90),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Gather Packed Dword Values Using Signed Dword
INST3(vpgatherdq,       "pgatherdq",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x90),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Gather Packed Dword with Signed Dword Indices
INST3(vpgatherqd,       "pgatherqd",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x91),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Gather Packed Dword Values Using Signed Qword
INST3(vpgatherqq,       "pgatherqq",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x91),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Gather Packed Qword with Signed Dword Indices
INST3(vpmaskmovd,       "pmaskmovd",        IUM_WR, SSE38(0x8E),  BAD_CODE,     SSE38(0x8C),                             INS_TT_NONE,                         Input_32Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Conditional SIMD Integer Packed Dword Loads and Stores
INST3(vpmaskmovq,       "pmaskmovq",        IUM_WR, SSE38(0x8E),  BAD_CODE,     SSE38(0x8C),                             INS_TT_NONE,                         Input_64Bit    | REX_W1       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Conditional SIMD Integer Packed Qword Loads and Stores
INST3(vpsllvd,          "psllvd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x47),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Variable Bit Shift Left Logical
INST3(vpsllvq,          "psllvq",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x47),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Variable Bit Shift Left Logical
INST3(vpsravd,          "psravd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x46),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Variable Bit Shift Right Arithmetic
INST3(vpsrlvd,          "psrlvd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x45),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Variable Bit Shift Right Logical
INST3(vpsrlvq,          "psrlvq",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x45),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Variable Bit Shift Right Logical

INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
//    id                nm                  um      mr            mi            rm                                       flags
INST3(vfmadd132pd,      "fmadd132pd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x98),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Add of Packed Double-Precision Floating-Point Values
INST3(vfmadd213pd,      "fmadd213pd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xA8),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmadd231pd,      "fmadd231pd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB8),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmadd132ps,      "fmadd132ps",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x98),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Add of Packed Single-Precision Floating-Point Values
INST3(vfmadd213ps,      "fmadd213ps",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xA8),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmadd231ps,      "fmadd231ps",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB8),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmadd132sd,      "fmadd132sd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x99),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values
INST3(vfmadd213sd,      "fmadd213sd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xA9),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmadd231sd,      "fmadd231sd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB9),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmadd132ss,      "fmadd132ss",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x99),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values
INST3(vfmadd213ss,      "fmadd213ss",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xA9),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmadd231ss,      "fmadd231ss",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB9),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmaddsub132pd,   "fmaddsub132pd",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x96),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values
INST3(vfmaddsub213pd,   "fmaddsub213pd",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xA6),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmaddsub231pd,   "fmaddsub231pd",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB6),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmaddsub132ps,   "fmaddsub132ps",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x96),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values
INST3(vfmaddsub213ps,   "fmaddsub213ps",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xA6),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmaddsub231ps,   "fmaddsub231ps",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB6),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsubadd132pd,   "fmsubadd132pd",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x97),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values
INST3(vfmsubadd213pd,   "fmsubadd213pd",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xA7),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsubadd231pd,   "fmsubadd231pd",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB7),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsubadd132ps,   "fmsubadd132ps",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x97),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values
INST3(vfmsubadd213ps,   "fmsubadd213ps",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xA7),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsubadd231ps,   "fmsubadd231ps",    IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB7),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsub132pd,      "fmsub132pd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9A),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values
INST3(vfmsub213pd,      "fmsub213pd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAA),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsub231pd,      "fmsub231pd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBA),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsub132ps,      "fmsub132ps",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9A),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values
INST3(vfmsub213ps,      "fmsub213ps",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAA),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsub231ps,      "fmsub231ps",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBA),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsub132sd,      "fmsub132sd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9B),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values
INST3(vfmsub213sd,      "fmsub213sd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAB),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsub231sd,      "fmsub231sd",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBB),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsub132ss,      "fmsub132ss",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9B),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values
INST3(vfmsub213ss,      "fmsub213ss",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAB),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfmsub231ss,      "fmsub231ss",       IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBB),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmadd132pd,     "fnmadd132pd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9C),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values
INST3(vfnmadd213pd,     "fnmadd213pd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAC),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmadd231pd,     "fnmadd231pd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBC),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmadd132ps,     "fnmadd132ps",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9C),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values
INST3(vfnmadd213ps,     "fnmadd213ps",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAC),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmadd231ps,     "fnmadd231ps",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBC),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmadd132sd,     "fnmadd132sd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9D),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values
INST3(vfnmadd213sd,     "fnmadd213sd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAD),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmadd231sd,     "fnmadd231sd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBD),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmadd132ss,     "fnmadd132ss",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9D),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values
INST3(vfnmadd213ss,     "fnmadd213ss",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAD),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmadd231ss,     "fnmadd231ss",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBD),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmsub132pd,     "fnmsub132pd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9E),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values
INST3(vfnmsub213pd,     "fnmsub213pd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAE),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmsub231pd,     "fnmsub231pd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBE),                             INS_TT_FULL,                         Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmsub132ps,     "fnmsub132ps",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9E),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values
INST3(vfnmsub213ps,     "fnmsub213ps",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAE),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmsub231ps,     "fnmsub231ps",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBE),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmsub132sd,     "fnmsub132sd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9F),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values
INST3(vfnmsub213sd,     "fnmsub213sd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAF),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmsub231sd,     "fnmsub231sd",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBF),                             INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmsub132ss,     "fnmsub132ss",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x9F),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values
INST3(vfnmsub213ss,     "fnmsub213ss",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xAF),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(vfnmsub231ss,     "fnmsub231ss",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xBF),                             INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //
INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

INST3(FIRST_AVXVNNI_INSTRUCTION, "FIRST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
INST3(vpdpbusd,          "pdpbusd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x50),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply and Add Unsigned and Signed Bytes
INST3(vpdpwssd,          "pdpwssd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x52),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply and Add Signed Word Integers
INST3(vpdpbusds,         "pdpbusds",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x51),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply and Add Unsigned and Signed Bytes with Saturation
INST3(vpdpwssds,         "pdpwssds",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x53),                             INS_TT_FULL,                         Input_32Bit    | REX_W0       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply and Add Signed Word Integers with Saturation
INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)


INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

// BMI1
INST3(andn,             "andn",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF2),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Resets_CF)    // Logical AND NOT
INST3(bextr,            "bextr",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF7),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Undefined_SF | Writes_ZF    | Undefined_AF | Undefined_PF | Resets_CF)    // Bit Field Extract
INST3(blsi,             "blsi",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Writes_CF)    // Extract Lowest Set Isolated Bit
INST3(blsmsk,           "blsmsk",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Resets_ZF    | Undefined_AF | Undefined_PF | Writes_CF)    // Get Mask Up to Lowest Set Bit
INST3(blsr,             "blsr",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Writes_CF)    // Reset Lowest Set Bit

// BMI2
INST3(bzhi,             "bzhi",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF5),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Writes_CF)    // Zero High Bits Starting with Specified Bit Position
INST3(mulx,             "mulx",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF6),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Unsigned Multiply Without Affecting Flags
INST3(pdep,             "pdep",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF5),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Parallel Bits Deposit
INST3(pext,             "pext",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF5),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Parallel Bits Extract
INST3(rorx,             "rorx",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0xF0),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX)
#ifdef TARGET_AMD64
INST3(sarx,             "sarx",             IUM_WR, BAD_CODE,     BAD_CODE,     PSSE38(0xF3, 0xF7),                      INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //  Shift Arithmetic Right Without Affecting Flags
INST3(shlx,             "shlx",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF7),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //  Shift Logical Left Without Affecting Flags
INST3(shrx,             "shrx",             IUM_WR, BAD_CODE,     BAD_CODE,     PSSE38(0xF2, 0xF7),                      INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           //  Shift Logical Right Without Affecting Flags
#endif

INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

INST3(FIRST_AVX512_INSTRUCTION, "FIRST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

// AVX512F
INST3(kandw,            "kandw",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x41),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical AND masks
INST3(kandnw,           "kandnw",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x42),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical AND NOT masks
INST3(kmovw_gpr,        "kmovw",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x92),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Move from and to mask registers
INST3(kmovw_msk,        "kmovw",            IUM_WR, PCKFLT(0x91),           BAD_CODE,     PCKFLT(0x90),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Move from and to mask registers
INST3(knotw,            "knotw",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x44),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // NOT mask register
INST3(korw,             "korw",             IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x45),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical OR masks
INST3(kortestw,         "kortestw",         IUM_RD, BAD_CODE,               BAD_CODE,     PCKFLT(0x98),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX | Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Writes_CF      | KInstruction)                  // OR masks and set flags
INST3(kshiftlw,         "kshiftlw",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x32),                   INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Shift left mask registers
INST3(kshiftrw,         "kshiftrw",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x30),                   INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Shift right mask registers
INST3(kunpckbw,         "kunpckbw",         IUM_RD, BAD_CODE,               BAD_CODE,     PCKDBL(0x4B),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Unpack for mask registers
INST3(kxnorw,           "kxnorw",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x46),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical XNOR masks
INST3(kxorw,            "kxorw",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x47),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical XOR masks
INST3(valignd,          "alignd",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x03),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Align doubleword vectors
INST3(valignq,          "alignq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x03),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Align quadword vectors
INST3(vblendmpd,        "blendmpd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x65),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Float64 vectors using an OpMask control
INST3(vblendmps,        "blendmps",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x65),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Float32 vectors using an OpMask control
INST3(vpblendmq,        "pblendmq",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x64),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Int32 vectors using an OpMask control
INST3(vpblendmb,        "pblendmb",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x66),                   INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Int64 vectors using an OpMask control
INST3(vbroadcastf64x2,  "broadcastf64x2",   IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x1A),                   INS_TT_TUPLE2,                       Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Broadcast packed float values read from memory to entire register
INST3(vbroadcasti64x2,  "broadcasti64x2",   IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x5A),                   INS_TT_TUPLE2,                       Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Broadcast packed integer values read from memory to entire register
INST3(vbroadcastf64x4,  "broadcastf64x4",   IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x1B),                   INS_TT_TUPLE2,                       Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Broadcast packed float values read from memory to entire register
INST3(vbroadcasti64x4,  "broadcasti64x4",   IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x5B),                   INS_TT_TUPLE2,                       Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Broadcast packed integer values read from memory to entire register
INST3(vcmpps,           "cmpps",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0xC2),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                          // compare packed singles
INST3(vcmpss,           "cmpss",            IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0xC2),                  INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                          // compare scalar singles
INST3(vcmppd,           "cmppd",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0xC2),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                          // compare packed doubles
INST3(vcmpsd,           "cmpsd",            IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0xC2),                  INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                          // compare scalar doubles
INST3(vcvtpd2udq,       "cvtpd2udq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x79),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed doubles to unsigned DWORDs
INST3(vcvtps2udq,       "cvtps2udq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x79),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed singles to unsigned DWORDs
INST3(vcvtsd2usi,       "cvtsd2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt scalar double to unsigned DWORD/QWORD
INST3(vcvtss2usi,       "cvtss2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt scalar single to unsigned DWORD/QWORD
INST3(vcvttpd2udq,      "cvttpd2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed doubles to unsigned DWORDs
INST3(vcvttps2udq,      "cvttps2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed singles to unsigned DWORDs
INST3(vcvttsd2usi,      "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned DWORD/QWORD
INST3(vcvttss2usi32,    "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
INST3(vcvttss2usi64,    "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
INST3(vcvtudq2pd,       "cvtudq2pd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x7A),                  INS_TT_HALF,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed unsigned DWORDs to doubles
INST3(vcvtudq2ps,       "cvtudq2ps",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x7A),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed unsigned DWORDs to singles
INST3(vcvtusi2sd32,     "cvtusi2sd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x7B),                  INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar unsigned DWORD to double
INST3(vcvtusi2sd64,     "cvtusi2sd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x7B),                  INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar unsigned QWORD to double
INST3(vcvtusi2ss32,     "cvtusi2ss",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x7B),                  INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar unsigned DWORD to single
INST3(vcvtusi2ss64,     "cvtusi2ss",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x7B),                  INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar unsigned QWORD to single
INST3(vextractf64x4,    "extractf64x4",     IUM_WR, SSE3A(0x1B),            BAD_CODE,     BAD_CODE,                      INS_TT_TUPLE4,                       Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Extract 256-bit packed double-precision floating point values
INST3(vextracti64x4,    "extracti64x4",     IUM_WR, SSE3A(0x3B),            BAD_CODE,     BAD_CODE,                      INS_TT_TUPLE4,                       Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Extract 256-bit packed quadword integer values
INST3(vfixupimmpd,      "fixupimmpd",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x54),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fixup special packed double-precision floating-point values
INST3(vfixupimmps,      "fixupimmps",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x54),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fixup special packed single-precision floating-point values
INST3(vfixupimmsd,      "fixupimmsd",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x55),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fixup special scalar double-precision floating-point value
INST3(vfixupimmss,      "fixupimmss",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x55),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Fixup special scalar single-precision floating-point value
INST3(vgetexppd,        "getexppd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x42),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Extract exponents of packed double-precision floating-point values
INST3(vgetexpps,        "getexpps",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x42),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Extract exponents of packed single-precision floating-point values
INST3(vgetexpsd,        "getexpsd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x43),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Extract exponents of scalar double-precision floating-point value
INST3(vgetexpss,        "getexpss",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x43),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Extract exponents of scalar single-precision floating-point value
INST3(vgetmantpd,       "getmantpd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x26),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Extract mantissas of packed double-precision floating-point values
INST3(vgetmantps,       "getmantps",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x26),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Extract mantissas of packed single-precision floating-point values
INST3(vgetmantsd,       "getmantsd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x27),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Extract mantissas of scalar double-precision floating-point value
INST3(vgetmantss,       "getmantss",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x27),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Extract mantissas of scalar single-precision floating-point value
INST3(vinsertf64x4,     "insertf64x4",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x1A),                   INS_TT_TUPLE4,                       Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert 256-bit packed double-precision floating point values
INST3(vinserti64x4,     "inserti64x4",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x3A),                   INS_TT_TUPLE4,                       Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert 256-bit packed quadword integer values
INST3(vmovdqa64,        "movdqa64",         IUM_WR, PCKDBL(0x7F),           BAD_CODE,     PCKDBL(0x6F),                  INS_TT_FULL_MEM,                     Input_64Bit    | REX_W1                       | Encoding_EVEX)
INST3(vmovdqu64,        "movdqu64",         IUM_WR, SSEFLT(0x7F),           BAD_CODE,     SSEFLT(0x6F),                  INS_TT_FULL_MEM,                     Input_64Bit    | REX_W1                       | Encoding_EVEX)
INST3(vpabsq,           "pabsq",            IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x1F),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Packed absolute value of 64-bit integers
INST3(vpandq,           "pandq",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0xDB),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed bit-wise AND of two xmm regs
INST3(vpandnq,          "pandnq",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0xDF),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed bit-wise AND NOT of two xmm regs
INST3(vpbroadcastd_gpr, "pbroadcastd",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7C),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Broadcast int32 value from gpr to entire register
INST3(vpbroadcastq_gpr, "pbroadcastq",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7C),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Broadcast int64 value from gpr to entire register
INST3(vpcmpeqd,         "pcmpeqd",          IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x76),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 32-bit integers for equality
INST3(vpcmpgtd,         "pcmpgtd",          IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x66),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 32-bit signed integers for greater than
INST3(vpcmpeqq,         "pcmpeqq",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x29),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 64-bit integers for equality
INST3(vpcmpgtq,         "pcmpgtq",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x37),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed compare 64-bit integers for equality
INST3(vpermq_reg,       "permq",            IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x36),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Permute 64-bit of input register
INST3(vpermpd_reg,      "permpd",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x16),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Permute 64-bit of input register
INST3(vpermi2d,         "permi2d",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x76),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting the Index
INST3(vpermi2pd,        "permi2pd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x77),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting the Index
INST3(vpermi2ps,        "permi2ps",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x77),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting the Index
INST3(vpermi2q,         "permi2q",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x76),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting the Index
INST3(vpermt2d,         "permt2d",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7E),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting one Table
INST3(vpermt2pd,        "permt2pd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7F),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting one Table
INST3(vpermt2ps,        "permt2ps",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7F),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting one Table
INST3(vpermt2q,         "permt2q",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7E),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting one Table
INST3(vpmaxsq,          "pmaxsq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x3D),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed maximum 64-bit signed integers
INST3(vpmaxuq,          "pmaxuq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x3F),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed maximum 64-bit unsigned integers
INST3(vpminsq,          "pminsq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x39),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed minimum 64-bit signed integers
INST3(vpminuq,          "pminuq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x3B),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // packed minimum 64-bit unsigned integers
INST3(vpmovdb,          "pmovdb",           IUM_WR, PSSE38(0xF3, 0x31),     BAD_CODE,     PSSE38(0xF3, 0x31),            INS_TT_QUARTER_MEM,                  Input_32Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovdw,          "pmovdw",           IUM_WR, PSSE38(0xF3, 0x33),     BAD_CODE,     PSSE38(0xF3, 0x33),            INS_TT_HALF_MEM,                     Input_32Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovqb,          "pmovqb",           IUM_WR, PSSE38(0xF3, 0x32),     BAD_CODE,     PSSE38(0xF3, 0x32),            INS_TT_EIGHTH_MEM,                   Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovqd,          "pmovqd",           IUM_WR, PSSE38(0xF3, 0x35),     BAD_CODE,     PSSE38(0xF3, 0x35),            INS_TT_HALF_MEM,                     Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovqw,          "pmovqw",           IUM_WR, PSSE38(0xF3, 0x34),     BAD_CODE,     PSSE38(0xF3, 0x34),            INS_TT_QUARTER_MEM,                  Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovsdb,         "pmovsdb",          IUM_WR, PSSE38(0xF3, 0x21),     BAD_CODE,     PSSE38(0xF3, 0x21),            INS_TT_QUARTER_MEM,                  Input_32Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovsdw,         "pmovsdw",          IUM_WR, PSSE38(0xF3, 0x23),     BAD_CODE,     PSSE38(0xF3, 0x23),            INS_TT_HALF_MEM,                     Input_32Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovsqb,         "pmovsqb",          IUM_WR, PSSE38(0xF3, 0x22),     BAD_CODE,     PSSE38(0xF3, 0x22),            INS_TT_EIGHTH_MEM,                   Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovsqd,         "pmovsqd",          IUM_WR, PSSE38(0xF3, 0x25),     BAD_CODE,     PSSE38(0xF3, 0x25),            INS_TT_HALF_MEM,                     Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovsqw,         "pmovsqw",          IUM_WR, PSSE38(0xF3, 0x24),     BAD_CODE,     PSSE38(0xF3, 0x24),            INS_TT_QUARTER_MEM,                  Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovusdb,        "pmovusdb",         IUM_WR, PSSE38(0xF3, 0x11),     BAD_CODE,     PSSE38(0xF3, 0x11),            INS_TT_QUARTER_MEM,                  Input_32Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovusdw,        "pmovusdw",         IUM_WR, PSSE38(0xF3, 0x13),     BAD_CODE,     PSSE38(0xF3, 0x13),            INS_TT_HALF_MEM,                     Input_32Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovusqb,        "pmovusqb",         IUM_WR, PSSE38(0xF3, 0x12),     BAD_CODE,     PSSE38(0xF3, 0x12),            INS_TT_EIGHTH_MEM,                   Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovusqd,        "pmovusqd",         IUM_WR, PSSE38(0xF3, 0x15),     BAD_CODE,     PSSE38(0xF3, 0x15),            INS_TT_HALF_MEM,                     Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovusqw,        "pmovusqw",         IUM_WR, PSSE38(0xF3, 0x14),     BAD_CODE,     PSSE38(0xF3, 0x14),            INS_TT_QUARTER_MEM,                  Input_64Bit    | REX_W0                       | Encoding_EVEX)
INST3(vporq,            "porq",             IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0xEB),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed bit-wise OR of two xmm regs
INST3(vprold,           "prold",            IUM_WR, BAD_CODE,               PCKDBL(0x72), BAD_CODE,                      INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bit rotate left
INST3(vprolq,           "prolq",            IUM_WR, BAD_CODE,               PCKDBL(0x72), BAD_CODE,                      INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bit rotate left
INST3(vprolvd,          "prolvd",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x15),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bit rotate left
INST3(vprolvq,          "prolvq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x15),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bit rotate left
INST3(vprord,           "prord",            IUM_WR, BAD_CODE,               PCKDBL(0x72), BAD_CODE,                      INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bit rotate right
INST3(vprorq,           "prorq",            IUM_WR, BAD_CODE,               PCKDBL(0x72), BAD_CODE,                      INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bit rotate right
INST3(vprorvd,          "prorvd",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x14),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bit rotate right
INST3(vprorvq,          "prorvq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x14),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bit rotate right
INST3(vpsraq,           "psraq",            IUM_WR, BAD_CODE,               PCKDBL(0x72), PCKDBL(0xE2),                  INS_TT_FULL     | INS_TT_MEM128,     Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed shift right arithmetic of 64-bit integers
INST3(vpsravq,          "psravq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x46),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Variable Bit Shift Right Arithmetic
INST3(vpternlogd,       "pternlogd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x25),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bitwise Ternary Logic
INST3(vpternlogq,       "pternlogq",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x25),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Bitwise Ternary Logic
INST3(vptestmd,         "ptestmd",          IUM_RD, BAD_CODE,               BAD_CODE,     SSE38(0x27),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Logical AND and set mask
INST3(vptestmq,         "ptestmq",          IUM_RD, BAD_CODE,               BAD_CODE,     SSE38(0x27),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Logical AND and set mask
INST3(vptestnmd,        "ptestnmd",         IUM_RD, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x27),            INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Logical NAND and set mask
INST3(vptestnmq,        "ptestnmq",         IUM_RD, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x27),            INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Logical NAND and set mask
INST3(vpxorq,           "pxorq",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0xEF),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed bit-wise XOR of two xmm regs
INST3(vrangepd,         "rangepd",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x50),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Range restriction calculation from a pair of packed double-precision floating-point values
INST3(vrangeps,         "rangeps",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x50),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Range restriction calculation from a pair of packed single-precision floating-point values
INST3(vrangesd,         "rangesd",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x51),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Range restriction calculation from a pair of scalar double-precision floating-point value
INST3(vrangess,         "rangess",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x51),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Range restriction calculation from a pair of scalar single-precision floating-point value
INST3(vrcp14pd,         "rcp14pd",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x4C),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Compute approximate reciprocals of packed double-precision floating-point values
INST3(vrcp14ps,         "rcp14ps",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x4C),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Compute approximate reciprocals of packed single-precision floating-point values
INST3(vrcp14sd,         "rcp14sd",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x4D),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Compute approximate reciprocals of scalar double-precision floating-point value
INST3(vrcp14ss,         "rcp14ss",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x4D),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Compute approximate reciprocals of scalar single-precision floating-point value
INST3(vreducepd,        "reducepd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x56),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Perform a reduction transformation on packed double-precision floating-point values
INST3(vreduceps,        "reduceps",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x56),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Perform a reduction transformation on packed single-precision floating-point values
INST3(vreducesd,        "reducesd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x57),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Perform a reduction transformation on scalar double-precision floating-point value
INST3(vreducess,        "reducess",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x57),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Perform a reduction transformation on scalar single-precision floating-point value
INST3(vrndscalepd,      "rndscalepd",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x09),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Round packed double-precision floating-point values to include a given number of fraction bits
INST3(vrndscaleps,      "rndscaleps",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x08),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Round packed single-precision floating-point values to include a given number of fraction bits
INST3(vrndscalesd,      "rndscalesd",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x0B),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Round scalar double-precision floating-point value to include a given number of fraction bits
INST3(vrndscaless,      "rndscaless",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x0A),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Round scalar single-precision floating-point value to include a given number of fraction bits
INST3(vrsqrt14pd,       "rsqrt14pd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x4E),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Compute approximate reciprocals of square roots of packed double-precision floating-point values
INST3(vrsqrt14ps,       "rsqrt14ps",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x4E),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Compute approximate reciprocals of square roots of packed single-precision floating-point values
INST3(vrsqrt14sd,       "rsqrt14sd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x4F),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Compute approximate reciprocals of square roots of scalar double-precision floating-point value
INST3(vrsqrt14ss,       "rsqrt14ss",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x4F),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Compute approximate reciprocals of square roots of scalar single-precision floating-point value
INST3(vscalefpd,        "scalefpd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x2C),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Scale packed double-precision floating-point values
INST3(vscalefps,        "scalefps",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x2C),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Scale packed single-precision floating-point values
INST3(vscalefsd,        "scalefsd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x2D),                   INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Scale scalar double-precision floating-point value
INST3(vscalefss,        "scalefss",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x2D),                   INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Scale scalar single-precision floating-point value
INST3(vshuff32x4,       "shuff32x4",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x23),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Shuffle packed values at 128-bit granularity
INST3(vshuff64x2,       "shuff64x2",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x23),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Shuffle packed values at 128-bit granularity
INST3(vshufi32x4,       "shufi32x4",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x43),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Shuffle packed values at 128-bit granularity
INST3(vshufi64x2,       "shufi64x2",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x43),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Shuffle packed values at 128-bit granularity

// AVX512BW
INST3(kaddd,            "kaddd",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x4A),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Add two masks
INST3(kaddq,            "kaddq",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x4A),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Add two masks
INST3(kandd,            "kandd",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x41),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical AND masks
INST3(kandq,            "kandq",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x41),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical AND masks
INST3(kandnd,           "kandnd",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x42),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical AND NOT masks
INST3(kandnq,           "kandnq",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x42),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical AND NOT masks
INST3(kmovd_gpr,        "kmovd",            IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x92),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Move from and to mask registers
INST3(kmovd_msk,        "kmovd",            IUM_WR, PCKDBL(0x91),           BAD_CODE,     PCKDBL(0x90),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Move from and to mask registers
INST3(kmovq_gpr,        "kmovq",            IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x92),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Move from and to mask registers
INST3(kmovq_msk,        "kmovq",            IUM_WR, PCKFLT(0x91),           BAD_CODE,     PCKFLT(0x90),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Move from and to mask registers
INST3(knotd,            "knotd",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x44),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // NOT mask register
INST3(knotq,            "knotq",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x44),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // NOT mask register
INST3(kord,             "kord",             IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x45),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical OR masks
INST3(korq,             "korq",             IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x45),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical OR masks
INST3(kortestd,         "kortestd",         IUM_RD, BAD_CODE,               BAD_CODE,     PCKDBL(0x98),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX | Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Writes_CF      | KInstruction)                  // OR masks and set flags
INST3(kortestq,         "kortestq",         IUM_RD, BAD_CODE,               BAD_CODE,     PCKFLT(0x98),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX | Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Writes_CF      | KInstruction)                  // OR masks and set flags
INST3(kshiftld,         "kshiftld",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x33),                   INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Shift left mask registers
INST3(kshiftlq,         "kshiftlq",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x33),                   INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Shift left mask registers
INST3(kshiftrd,         "kshiftrd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x31),                   INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Shift right mask registers
INST3(kshiftrq,         "kshiftrq",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x31),                   INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Shift right mask registers
INST3(ktestd,           "ktestd",           IUM_RD, BAD_CODE,               BAD_CODE,     PCKDBL(0x99),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX | Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Writes_CF      | KInstruction)                  // Packed bit test masks and set flags
INST3(ktestq,           "ktestq",           IUM_RD, BAD_CODE,               BAD_CODE,     PCKFLT(0x99),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX | Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Writes_CF      | KInstruction)                  // Packed bit test masks and set flags
INST3(kunpckdq,         "kunpckdq",         IUM_RD, BAD_CODE,               BAD_CODE,     PCKFLT(0x4B),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Unpack for mask registers
INST3(kunpckwd,         "kunpckwd",         IUM_RD, BAD_CODE,               BAD_CODE,     PCKFLT(0x4B),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Unpack for mask registers
INST3(kxnord,           "kxnord",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x46),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical XNOR masks
INST3(kxnorq,           "kxnorq",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x46),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical XNOR masks
INST3(kxord,            "kxord",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x47),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical XOR masks
INST3(kxorq,            "kxorq",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x47),                  INS_TT_NONE,                                          REX_W1                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical XOR masks
INST3(vpblendmd,        "pblendmd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x64),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Byte vectors using an OpMask control
INST3(vpblendmw,        "pblendmw",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x66),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Blend Word vectors using an OpMask control
INST3(vdbpsadbw,        "dbpsadbw",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x42),                   INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Double block packed Sum-Absolute-Differences (SAD) on unsigned bytes
INST3(vmovdqu8,         "movdqu8",          IUM_WR, SSEFLT(0x7F),           BAD_CODE,     SSEFLT(0x6F),                  INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX)
INST3(vmovdqu16,        "movdqu16",         IUM_WR, SSEFLT(0x7F),           BAD_CODE,     SSEFLT(0x6F),                  INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX)
INST3(vpbroadcastb_gpr, "pbroadcastb",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7A),                   INS_TT_TUPLE1_SCALAR,                Input_8Bit     | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Broadcast int8 value from gpr to entire register
INST3(vpbroadcastw_gpr, "pbroadcastw",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7B),                   INS_TT_TUPLE1_SCALAR,                Input_16Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Broadcast int16 value from gpr to entire register
INST3(vpcmpb,           "pcmpb",            IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x3F),                   INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(vpcmpeqb,         "pcmpeqb",          IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x74),                  INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG                      | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                          // Packed compare 8-bit integers for equality
INST3(vpcmpeqw,         "pcmpeqw",          IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x75),                  INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG                      | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                          // Packed compare 16-bit integers for equality
INST3(vpcmpgtb,         "pcmpgtb",          IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x64),                  INS_TT_FULL_MEM,                     Input_8Bit     | REX_WIG                      | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                          // Packed compare 8-bit signed integers for greater than
INST3(vpcmpgtw,         "pcmpgtw",          IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x65),                  INS_TT_FULL_MEM,                     Input_16Bit    | REX_WIG                      | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                          // Packed compare 16-bit signed integers for greater than
INST3(vpcmpw,           "pcmpw",            IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x3F),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(vpcmpub,          "pcmpub",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x3E),                   INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(vpcmpuw,          "pcmpuw",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x3E),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(vpermw,           "permw",            IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x8D),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Permute Packed Doublewords Elements
INST3(vpermi2w,         "permi2w",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x75),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting the Index
INST3(vpermt2w,         "permt2w",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7D),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute From Two Tables Overwriting one Table
INST3(vpmovb2m,         "pmovb2m",          IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x29),            INS_TT_NONE,                         Input_8Bit     | REX_W0                       | Encoding_EVEX)
INST3(vpmovm2b,         "pmovm2b",          IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x28),            INS_TT_NONE,                         Input_8Bit     | REX_W0                       | Encoding_EVEX)
INST3(vpmovm2w,         "pmovm2w",          IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x28),            INS_TT_NONE,                         Input_16Bit    | REX_W1                       | Encoding_EVEX)
INST3(vpmovw2m,         "pmovw2m",          IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x29),            INS_TT_NONE,                         Input_16Bit    | REX_W1                       | Encoding_EVEX)
INST3(vpmovwb,          "pmovwb",           IUM_WR, PSSE38(0xF3, 0x30),     BAD_CODE,     PSSE38(0xF3, 0x30),            INS_TT_HALF_MEM,                     Input_16Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovswb,         "pmovswb",          IUM_WR, PSSE38(0xF3, 0x20),     BAD_CODE,     PSSE38(0xF3, 0x20),            INS_TT_HALF_MEM,                     Input_16Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovuswb,        "pmovuswb",         IUM_WR, PSSE38(0xF3, 0x10),     BAD_CODE,     PSSE38(0xF3, 0x10),            INS_TT_HALF_MEM,                     Input_16Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpsllvw,          "psllvw",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x12),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Variable Bit Shift Left Logical
INST3(vpsravw,          "psravw",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x11),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Variable Bit Shift Right Arithmetic
INST3(vpsrlvw,          "psrlvw",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x10),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Variable Bit Shift Right Logical
INST3(vptestmb,         "ptestmb",          IUM_RD, BAD_CODE,               BAD_CODE,     SSE38(0x26),                   INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Logical AND and set mask
INST3(vptestmw,         "ptestmw",          IUM_RD, BAD_CODE,               BAD_CODE,     SSE38(0x26),                   INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Logical AND and set mask
INST3(vptestnmb,        "ptestnmb",         IUM_RD, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x26),            INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Logical NAND and set mask
INST3(vptestnmw,        "ptestnmw",         IUM_RD, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x26),            INS_TT_FULL_MEM,                     Input_16Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Logical NAND and set mask

// AVX512CD
INST3(vpconflictd,      "pconflictd",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0xC4),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Detect conflicts within a vector of packed dword values into dense memory/register
INST3(vpconflictq,      "pconflictq",       IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0xC4),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Detect conflicts within a vector of packed qword values into dense memory/register
INST3(vplzcntd,         "plzcntd",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x44),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Count the number of leading zero bits for packed dword values
INST3(vplzcntq,         "plzcntq",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x44),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Count the number of leading zero bits for packed qword values

// AVX512DQ
INST3(kaddb,            "kaddb",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x4A),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Add two masks
INST3(kaddw,            "kaddw",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x4A),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Add two masks
INST3(kandb,            "kandb",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x41),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical AND masks
INST3(kandnb,           "kandnb",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x42),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical AND NOT masks
INST3(kmovb_gpr,        "kmovb",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x92),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Move from and to mask registers
INST3(kmovb_msk,        "kmovb",            IUM_WR, PCKDBL(0x91),           BAD_CODE,     PCKDBL(0x90),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Move from and to mask registers
INST3(knotb,            "knotb",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x44),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // NOT mask register
INST3(korb,             "korb",             IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x45),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical OR masks
INST3(kortestb,         "kortestb",         IUM_RD, BAD_CODE,               BAD_CODE,     PCKDBL(0x98),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX | Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Writes_CF      | KInstruction)                  // OR masks and set flags
INST3(kshiftlb,         "kshiftlb",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x32),                   INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Shift left mask registers
INST3(kshiftrb,         "kshiftrb",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x30),                   INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Shift right mask registers
INST3(ktestb,           "ktestb",           IUM_RD, BAD_CODE,               BAD_CODE,     PCKDBL(0x99),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX | Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Writes_CF      | KInstruction)                  // Packed bit test masks and set flags
INST3(ktestw,           "ktestw",           IUM_RD, BAD_CODE,               BAD_CODE,     PCKFLT(0x99),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX | Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Writes_CF      | KInstruction)                  // Packed bit test masks and set flags
INST3(kxnorb,           "kxnorb",           IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x46),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical XNOR masks
INST3(kxorb,            "kxorb",            IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x47),                  INS_TT_NONE,                                          REX_W0                       | Encoding_VEX                                                                                                   | KInstruction)                  // Bitwise logical XOR masks
INST3(vbroadcastf32x2,  "broadcastf32x2",   IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x19),                   INS_TT_TUPLE2,                       Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Broadcast packed float values read from memory to entire register
INST3(vbroadcasti32x2,  "broadcasti32x2",   IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x59),                   INS_TT_TUPLE2,                       Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Broadcast packed integer values read from memory to entire register
INST3(vbroadcastf32x8,  "broadcastf32x8",   IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x1B),                   INS_TT_TUPLE8,                       Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Broadcast packed float values read from memory to entire register
INST3(vbroadcasti32x8,  "broadcasti32x8",   IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x5B),                   INS_TT_TUPLE8,                       Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Broadcast packed integer values read from memory to entire register
INST3(vcvtpd2qq,        "cvtpd2qq",         IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x7B),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed doubles to   signed QWORDs
INST3(vcvtpd2uqq,       "cvtpd2uqq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x79),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed doubles to unsigned QWORDs
INST3(vcvtps2qq,        "cvtps2qq",         IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x7B),                  INS_TT_HALF,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed singles to   signed QWORDs
INST3(vcvtps2uqq,       "cvtps2uqq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x79),                  INS_TT_HALF,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed singles to unsigned QWORDs
INST3(vcvtqq2pd,        "cvtqq2pd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0xE6),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed signed QWORDs to doubles
INST3(vcvtqq2ps,        "cvtqq2ps",         IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x5B),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed signed QWORDs to singles
INST3(vcvttpd2qq,       "cvttpd2qq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x7A),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed doubles to   signed QWORDs
INST3(vcvttpd2uqq,      "cvttpd2uqq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x78),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed doubles to unsigned QWORDs
INST3(vcvttps2qq,       "cvttps2qq",        IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x7A),                  INS_TT_HALF,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed singles to   signed QWORDs
INST3(vcvttps2uqq,      "cvttps2uqq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKDBL(0x78),                  INS_TT_HALF,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed singles to unsigned QWORDs
INST3(vcvtuqq2pd,       "cvtuqq2pd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x7A),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed signed QWORDs to doubles
INST3(vcvtuqq2ps,       "cvtuqq2ps",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x7A),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt packed signed QWORDs to singles
INST3(vextractf32x8,    "extractf32x8",     IUM_WR, SSE3A(0x1B),            BAD_CODE,     BAD_CODE,                      INS_TT_TUPLE8,                       Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Extract 256-bit packed double-precision floating point values
INST3(vextractf64x2,    "extractf64x2",     IUM_WR, SSE3A(0x19),            BAD_CODE,     BAD_CODE,                      INS_TT_TUPLE2,                       Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Extract 256-bit packed double-precision floating point values
INST3(vextracti32x8,    "extracti32x8",     IUM_WR, SSE3A(0x3B),            BAD_CODE,     BAD_CODE,                      INS_TT_TUPLE8,                       Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Extract 256-bit packed quadword integer values
INST3(vextracti64x2,    "extracti64x2",     IUM_WR, SSE3A(0x39),            BAD_CODE,     BAD_CODE,                      INS_TT_TUPLE2,                       Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // Extract 256-bit packed quadword integer values
INST3(vinsertf32x8,     "insertf32x8",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x1A),                   INS_TT_TUPLE8,                       Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert 256-bit packed double-precision floating point values
INST3(vinsertf64x2,     "insertf64x2",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x18),                   INS_TT_TUPLE2,                       Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert 256-bit packed double-precision floating point values
INST3(vinserti32x8,     "inserti32x8",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x3A),                   INS_TT_TUPLE8,                       Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert 256-bit packed quadword integer values
INST3(vinserti64x2,     "inserti64x2",      IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x38),                   INS_TT_TUPLE2,                       Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Insert 256-bit packed quadword integer values
INST3(vpcmpd,           "pcmpd",            IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x1F),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_Is3OperandInstructionMask)
INST3(vpcmpq,           "pcmpq",            IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x1F),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_Is3OperandInstructionMask)
INST3(vpcmpud,          "pcmpud",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x1E),                   INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX  | INS_Flags_Is3OperandInstructionMask)
INST3(vpcmpuq,          "pcmpuq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE3A(0x1E),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_Is3OperandInstructionMask)
INST3(vpmovd2m,         "pmovd2m",          IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x39),            INS_TT_NONE,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovm2d,         "pmovm2d",          IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x38),            INS_TT_NONE,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)
INST3(vpmovm2q,         "pmovm2q",          IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x38),            INS_TT_NONE,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)
INST3(vpmovq2m,         "pmovq2m",          IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xF3, 0x39),            INS_TT_NONE,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)
INST3(vpmullq,          "pmullq",           IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x40),                   INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Packed multiply 64 bit unsigned integers and store lower 64 bits of each result

// AVX512VBMI
INST3(vpermb,           "permb",            IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x8D),                   INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Permute Packed Byte Elements
INST3(vpermi2b,         "permi2b",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x75),                   INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute of Bytes from Two Tables Overwriting the Index
INST3(vpermt2b,         "permt2b",          IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0x7D),                   INS_TT_FULL_MEM,                     Input_8Bit     | REX_W0                       | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Full Permute of Bytes from Two Tables Overwriting one Table

INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

// Scalar instructions in SSE4.2
INST3(crc32,            "crc32",            IUM_RW, BAD_CODE,     BAD_CODE,     PSSE38(0xF2, 0xF0),                      INS_TT_NONE,    INS_FLAGS_None)

// BMI1
INST3(tzcnt,            "tzcnt",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0xBC),                            INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Writes_ZF     | Undefined_AF  | Undefined_PF  | Writes_CF)    // Count the Number of Trailing Zero Bits

// LZCNT
INST3(lzcnt,            "lzcnt",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0xBD),                            INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Writes_ZF     | Undefined_AF  | Undefined_PF  | Writes_CF)

// MOVBE
INST3(movbe,            "movbe",            IUM_WR, PCKMVB(0xF1), BAD_CODE,     PCKMVB(0xF0),                            INS_TT_NONE,    INS_FLAGS_None)

// POPCNT
INST3(popcnt,           "popcnt",           IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0xB8),                            INS_TT_NONE,    Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Resets_CF)

//    id                nm                  um      mr            mi                                                     flags
INST2(ret,              "ret",              IUM_RD, 0x0000C3,     0x0000C2,                                              INS_TT_NONE,    INS_FLAGS_None)
INST2(loop,             "loop",             IUM_RD, BAD_CODE,     0x0000E2,                                              INS_TT_NONE,    INS_FLAGS_None)
INST2(call,             "call",             IUM_RD, 0x0010FF,     0x0000E8,                                              INS_TT_NONE,    INS_FLAGS_None)

INST2(rol,              "rol",              IUM_RW, 0x0000D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(rol_1,            "rol",              IUM_RW, 0x0000D0,     0x0000D0,                                              INS_TT_NONE,    Writes_OF                                                                      | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(rol_N,            "rol",              IUM_RW, 0x0000C0,     0x0000C0,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(ror,              "ror",              IUM_RW, 0x0008D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(ror_1,            "ror",              IUM_RW, 0x0008D0,     0x0008D0,                                              INS_TT_NONE,    Writes_OF                                                                      | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(ror_N,            "ror",              IUM_RW, 0x0008C0,     0x0008C0,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit)

INST2(rcl,              "rcl",              IUM_RW, 0x0010D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit)
INST2(rcl_1,            "rcl",              IUM_RW, 0x0010D0,     0x0010D0,                                              INS_TT_NONE,    Writes_OF                                                                      | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit)
INST2(rcl_N,            "rcl",              IUM_RW, 0x0010C0,     0x0010C0,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit)
INST2(rcr,              "rcr",              IUM_RW, 0x0018D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit)
INST2(rcr_1,            "rcr",              IUM_RW, 0x0018D0,     0x0018D0,                                              INS_TT_NONE,    Writes_OF                                                                      | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit)
INST2(rcr_N,            "rcr",              IUM_RW, 0x0018C0,     0x0018C0,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit)
INST2(shl,              "shl",              IUM_RW, 0x0020D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(shl_1,            "shl",              IUM_RW, 0x0020D0,     0x0020D0,                                              INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(shl_N,            "shl",              IUM_RW, 0x0020C0,     0x0020C0,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(shr,              "shr",              IUM_RW, 0x0028D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(shr_1,            "shr",              IUM_RW, 0x0028D0,     0x0028D0,                                              INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(shr_N,            "shr",              IUM_RW, 0x0028C0,     0x0028C0,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(sar,              "sar",              IUM_RW, 0x0038D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(sar_1,            "sar",              IUM_RW, 0x0038D0,     0x0038D0,                                              INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)
INST2(sar_N,            "sar",              IUM_RW, 0x0038C0,     0x0038C0,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit)


//    id                nm                  um      mr                                                                   flags
INST1(r_movsb,          "rep movsb",        IUM_RD, 0x00A4F3,                                                            INS_TT_NONE,    Reads_DF | INS_FLAGS_Has_Wbit)
INST1(r_movsd,          "rep movsd",        IUM_RD, 0x00A5F3,                                                            INS_TT_NONE,    Reads_DF | INS_FLAGS_Has_Wbit)
#if defined(TARGET_AMD64)
INST1(r_movsq,          "rep movsq",        IUM_RD, 0xF3A548,                                                            INS_TT_NONE,    Reads_DF)
#endif // defined(TARGET_AMD64)
INST1(movsb,            "movsb",            IUM_RD, 0x0000A4,                                                            INS_TT_NONE,    Reads_DF   | INS_FLAGS_Has_Wbit)
INST1(movsd,            "movsd",            IUM_RD, 0x0000A5,                                                            INS_TT_NONE,    Reads_DF   | INS_FLAGS_Has_Wbit)
#if defined(TARGET_AMD64)
INST1(movsq,            "movsq",            IUM_RD, 0x00A548,                                                            INS_TT_NONE,    Reads_DF)
#endif // defined(TARGET_AMD64)

INST1(r_stosb,          "rep stosb",        IUM_RD, 0x00AAF3,                                                            INS_TT_NONE,    Reads_DF   | INS_FLAGS_Has_Wbit)
INST1(r_stosd,          "rep stosd",        IUM_RD, 0x00ABF3,                                                            INS_TT_NONE,    Reads_DF   | INS_FLAGS_Has_Wbit)
#if defined(TARGET_AMD64)
INST1(r_stosq,          "rep stosq",        IUM_RD, 0xF3AB48,                                                            INS_TT_NONE,    Reads_DF)
#endif // defined(TARGET_AMD64)
INST1(stosb,            "stosb",            IUM_RD, 0x0000AA,                                                            INS_TT_NONE,    Reads_DF   | INS_FLAGS_Has_Wbit)
INST1(stosd,            "stosd",            IUM_RD, 0x0000AB,                                                            INS_TT_NONE,    Reads_DF   | INS_FLAGS_Has_Wbit)
#if defined(TARGET_AMD64)
INST1(stosq,            "stosq",            IUM_RD, 0x00AB48,                                                            INS_TT_NONE,    Reads_DF)
#endif // defined(TARGET_AMD64)

INST1(int3,             "int3",             IUM_RD, 0x0000CC,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST1(nop,              "nop",              IUM_RD, 0x000090,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST1(pause,            "pause",            IUM_RD, 0x0090F3,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST1(lock,             "lock",             IUM_RD, 0x0000F0,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST1(leave,            "leave",            IUM_RD, 0x0000C9,                                                            INS_TT_NONE,    INS_FLAGS_None)

INST1(serialize,        "serialize",        IUM_RD, 0x0fe801,                                                            INS_TT_NONE,    INS_FLAGS_None)

INST1(neg,              "neg",              IUM_RW, 0x0018F6,                                                            INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | INS_FLAGS_Has_Wbit)
INST1(not,              "not",              IUM_RW, 0x0010F6,                                                            INS_TT_NONE,    INS_FLAGS_None | INS_FLAGS_Has_Wbit)

INST1(cwde,             "cwde",             IUM_RD, 0x000098,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST1(cdq,              "cdq",              IUM_RD, 0x000099,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST1(idiv,             "idiv",             IUM_RD, 0x0038F6,                                                            INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Undefined_CF  | INS_FLAGS_Has_Wbit)
INST1(imulEAX,          "imul",             IUM_RD, 0x0028F6,                                                            INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Wbit)
INST1(div,              "div",              IUM_RD, 0x0030F6,                                                            INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Undefined_CF  | INS_FLAGS_Has_Wbit)
INST1(mulEAX,           "mul",              IUM_RD, 0x0020F6,                                                            INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Wbit)

INST1(sahf,             "sahf",             IUM_RD, 0x00009E,                                                            INS_TT_NONE,    Restore_SF_ZF_AF_PF_CF)

INST1(xadd,             "xadd",             IUM_RW, 0x0F00C0,                                                            INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | INS_FLAGS_Has_Wbit)
INST1(cmpxchg,          "cmpxchg",          IUM_RW, 0x0F00B0,                                                            INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | INS_FLAGS_Has_Wbit)

INST1(shld,             "shld",             IUM_RW, 0x0F00A4,                                                            INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF)
INST1(shrd,             "shrd",             IUM_RW, 0x0F00AC,                                                            INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF)

// For RyuJIT/x86, we follow the x86 calling convention that requires
// us to return floating point value on the x87 FP stack, so we need
// these instructions regardless of whether we're using full stack fp.
#ifdef TARGET_X86
INST1(fld,              "fld",              IUM_WR, 0x0000D9,                                                            INS_TT_NONE,    INS_FLAGS_x87Instr)
INST1(fstp,             "fstp",             IUM_WR, 0x0018D9,                                                            INS_TT_NONE,    INS_FLAGS_x87Instr)
#endif // TARGET_X86

INST1(seto,             "seto",             IUM_WR, 0x0F0090,                                                            INS_TT_NONE,    Reads_OF)
INST1(setno,            "setno",            IUM_WR, 0x0F0091,                                                            INS_TT_NONE,    Reads_OF)
INST1(setb,             "setb",             IUM_WR, 0x0F0092,                                                            INS_TT_NONE,    Reads_CF)
INST1(setae,            "setae",            IUM_WR, 0x0F0093,                                                            INS_TT_NONE,    Reads_CF)
INST1(sete,             "sete",             IUM_WR, 0x0F0094,                                                            INS_TT_NONE,    Reads_ZF)
INST1(setne,            "setne",            IUM_WR, 0x0F0095,                                                            INS_TT_NONE,    Reads_ZF)
INST1(setbe,            "setbe",            IUM_WR, 0x0F0096,                                                            INS_TT_NONE,    Reads_ZF | Reads_CF)
INST1(seta,             "seta",             IUM_WR, 0x0F0097,                                                            INS_TT_NONE,    Reads_ZF | Reads_CF)
INST1(sets,             "sets",             IUM_WR, 0x0F0098,                                                            INS_TT_NONE,    Reads_SF)
INST1(setns,            "setns",            IUM_WR, 0x0F0099,                                                            INS_TT_NONE,    Reads_SF)
INST1(setp,             "setp",             IUM_WR, 0x0F009A,                                                            INS_TT_NONE,    Reads_PF)
INST1(setnp,            "setnp",            IUM_WR, 0x0F009B,                                                            INS_TT_NONE,    Reads_PF)
INST1(setl,             "setl",             IUM_WR, 0x0F009C,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF)
INST1(setge,            "setge",            IUM_WR, 0x0F009D,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF)
INST1(setle,            "setle",            IUM_WR, 0x0F009E,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF)
INST1(setg,             "setg",             IUM_WR, 0x0F009F,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF)

// Indirect jump used for tailcalls. We differentiate between func-internal
// indirect jump (e.g. used for switch) and tailcall indirect jumps because the
// x64 unwinder might require the latter to be rex.w prefixed.
INST1(tail_i_jmp,       "tail.jmp",         IUM_RD, 0x0020FF,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST1(i_jmp,            "jmp",              IUM_RD, 0x0020FF,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST0(jmp,              "jmp",              IUM_RD, 0x0000EB,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST0(jo,               "jo",               IUM_RD, 0x000070,                                                            INS_TT_NONE,    Reads_OF)
INST0(jno,              "jno",              IUM_RD, 0x000071,                                                            INS_TT_NONE,    Reads_OF)
INST0(jb,               "jb",               IUM_RD, 0x000072,                                                            INS_TT_NONE,    Reads_CF)
INST0(jae,              "jae",              IUM_RD, 0x000073,                                                            INS_TT_NONE,    Reads_CF)
INST0(je,               "je",               IUM_RD, 0x000074,                                                            INS_TT_NONE,    Reads_ZF)
INST0(jne,              "jne",              IUM_RD, 0x000075,                                                            INS_TT_NONE,    Reads_ZF)
INST0(jbe,              "jbe",              IUM_RD, 0x000076,                                                            INS_TT_NONE,    Reads_ZF | Reads_CF)
INST0(ja,               "ja",               IUM_RD, 0x000077,                                                            INS_TT_NONE,    Reads_ZF | Reads_CF)
INST0(js,               "js",               IUM_RD, 0x000078,                                                            INS_TT_NONE,    Reads_SF)
INST0(jns,              "jns",              IUM_RD, 0x000079,                                                            INS_TT_NONE,    Reads_SF)
INST0(jp,               "jp",               IUM_RD, 0x00007A,                                                            INS_TT_NONE,    Reads_PF)
INST0(jnp,              "jnp",              IUM_RD, 0x00007B,                                                            INS_TT_NONE,    Reads_PF)
INST0(jl,               "jl",               IUM_RD, 0x00007C,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF)
INST0(jge,              "jge",              IUM_RD, 0x00007D,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF)
INST0(jle,              "jle",              IUM_RD, 0x00007E,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF)
INST0(jg,               "jg",               IUM_RD, 0x00007F,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF)

INST0(l_jmp,            "jmp",              IUM_RD, 0x0000E9,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST0(l_jo,             "jo",               IUM_RD, 0x00800F,                                                            INS_TT_NONE,    Reads_OF)
INST0(l_jno,            "jno",              IUM_RD, 0x00810F,                                                            INS_TT_NONE,    Reads_OF)
INST0(l_jb,             "jb",               IUM_RD, 0x00820F,                                                            INS_TT_NONE,    Reads_CF)
INST0(l_jae,            "jae",              IUM_RD, 0x00830F,                                                            INS_TT_NONE,    Reads_CF)
INST0(l_je,             "je",               IUM_RD, 0x00840F,                                                            INS_TT_NONE,    Reads_ZF)
INST0(l_jne,            "jne",              IUM_RD, 0x00850F,                                                            INS_TT_NONE,    Reads_ZF)
INST0(l_jbe,            "jbe",              IUM_RD, 0x00860F,                                                            INS_TT_NONE,    Reads_ZF | Reads_CF)
INST0(l_ja,             "ja",               IUM_RD, 0x00870F,                                                            INS_TT_NONE,    Reads_ZF | Reads_CF)
INST0(l_js,             "js",               IUM_RD, 0x00880F,                                                            INS_TT_NONE,    Reads_SF)
INST0(l_jns,            "jns",              IUM_RD, 0x00890F,                                                            INS_TT_NONE,    Reads_SF)
INST0(l_jp,             "jp",               IUM_RD, 0x008A0F,                                                            INS_TT_NONE,    Reads_PF)
INST0(l_jnp,            "jnp",              IUM_RD, 0x008B0F,                                                            INS_TT_NONE,    Reads_PF)
INST0(l_jl,             "jl",               IUM_RD, 0x008C0F,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF)
INST0(l_jge,            "jge",              IUM_RD, 0x008D0F,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF)
INST0(l_jle,            "jle",              IUM_RD, 0x008E0F,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF)
INST0(l_jg,             "jg",               IUM_RD, 0x008F0F,                                                            INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF)

INST0(align,            "align",            IUM_RD, BAD_CODE,                                                            INS_TT_NONE,    INS_FLAGS_None)
INST0(data16,           "data16",           IUM_RD, 0x000066,                                                            INS_TT_NONE,    INS_FLAGS_None)

/*****************************************************************************/
#undef  INST0
#undef  INST1
#undef  INST2
#undef  INST3
#undef  INST4
#undef  INST5
/*****************************************************************************/

// clang-format on
